from sklearn.model_selection import train_test_split
import pandas as pd

df = pd.read_csv('data/cardio_train.csv', delimiter=';')

_, sampled_df = train_test_split(df, test_size=8000 / len(df), stratify=df['cardio'], random_state=42)

sampled_df['age'] = sampled_df['age'] / 365.25
sampled_df.drop(columns=['id'], inplace=True)
sampled_df['gender'] -= 1


sampled_df.to_csv('data/cardio_sampled.csv', index=False)
print('done')